### Replication code
### Article: "Turnover: How lame-duck governments disrupt the bureaucracy and service delivery before leaving office"
### Author: Guillermo Toral (www.guillermotoral.com)
### Date: July 2023
### This file prepares the dataset on municipal elections
### This file uses the electionsBR package, which loads and organizes election data published by Brazil's Supreme Electoral Court (TSE). Download instructions are in the README file
### R version, platform, and package versions reported at the end of the file

# Prepare the environment -------------------------------------------------

### This section of the code prepares the environment 

# Clean the environment
rm(list = ls())

# Install required packages if not previously installed
package_list <- c("tidyverse", "here", "electionsBR") 
packages_to_install <- package_list[!(package_list %in% installed.packages()[,"Package"])]
if(length(packages_to_install)>0){
  install.packages(packages_to_install)
}

# Load required packages
library(tidyverse)
library(here)
library(electionsBR)

# Set Working Directory to wherever this file is located.
setwd(here())

# Load electoral data -------------------------------

# Load data on candidates and their performance in local elections from 2000 to 2016, from the site of Brazil's Supreme Electoral Court (TSE): http://www.tse.jus.br/eleicoes/estatisticas/repositorio-de-dados-eleitorais-1/repositorio-de-dados-eleitorais

load("../../datasets/downloaded/elections/tse_elections_2000_to_2016.RData") # files starting with d and v have data on candidates and candidates' performance by year. Numbers correspond to election years, from 2000 to 2016

# Extract data on candidates ----------------------------------------------

# List of categories of candidates to exclude -- these are candidates whose candidacy was not validated or was cancelled by the electoral justice, or who died
invalid <- c("INDEFERIDO", "INDEFERIDO POR IMPUGNAÇÃO", "FALECIDO", "CASSADO", "CASSAÇÃO DO REGISTRO", "CANCELAMENTO", "CANCELADO", "INELEGÍVEL")

# Identify mayoral candidates running for the 2016 - 2020 term
candidates_1620 <- d16 %>% 
  # Exclude candidates other than valid candidates for mayor who got elected
  dplyr::filter(DESCRICAO_CARGO=="PREFEITO" & # Retain only candidates for mayoral office
                  !(DES_SITUACAO_CANDIDATURA %in% invalid)) %>% # And whose candidacy was not invalid
  # Keep and rename variables of interest
  dplyr::select(code_municipality_tse = SIGLA_UE, # TSE's municipality code
                code_candidate = SEQUENCIAL_CANDIDATO, # TSE's candidate code
                cpf_candidate = CPF_CANDIDATO, # Candidate's CPF (a government unique ID for individuals, similar to the social security number in the US)
                code_party = NUMERO_PARTIDO, # TSE's party code
                name_party = SIGLA_PARTIDO, # Party name
                round = NUM_TURNO, # Round of elections
                candidate_situation = DES_SITUACAO_CANDIDATURA, # situation of the candidacy
                candidate_result = DESC_SIT_TOT_TURNO,
                election_description = DESCRICAO_ELEICAO) # Description of the election
# Identify those who were elected as mayors
mayors_1620 <- candidates_1620 %>% 
  dplyr::filter(candidate_result == "ELEITO") # Keep elected ones 
# Identify municipalities where we observe multiple mayors being elected
repeated_muns <- mayors_1620[duplicated(mayors_1620$code_municipality_tse),] # 28
repeated_elections <- mayors_1620[which(mayors_1620$code_municipality_tse %in% repeated_muns$code_municipality_tse),] 
# Identify municipalities where we observe supplementary elections
supplementary_elections_1620 <- repeated_elections[which(repeated_elections$election_description!="Eleições Municipais 2016"),]
supplementary_elections_1620$supplementary_election <- 1
# Remove observations where there are repeated elections
candidates_1620 <- candidates_1620 %>% 
  dplyr::filter(!(code_municipality_tse %in% repeated_elections$code_municipality_tse))
candidates_1620$supplementary_election <- 0
# Add the supplementary elections
candidates_1620 <- rbind(candidates_1620, supplementary_elections_1620)
# Filter the mayors file
mayors_1620 <- candidates_1620 %>% 
  dplyr::filter(candidate_result == "ELEITO") # Keep elected ones 

# Identify mayoral candidates running for the 2012 - 2016 term
candidates_1216 <- d12 %>% 
  # Exclude candidates other than valid candidates for mayor who got elected
  dplyr::filter(DESCRICAO_CARGO=="PREFEITO" &  # Retain only candidates for mayoral office
                  !(DES_SITUACAO_CANDIDATURA %in% invalid)) %>% # And whose candidacy was not invalid
  # Keep and rename variables of interest
  dplyr::select(code_municipality_tse = SIGLA_UE, # TSE's municipality code
                code_candidate = SEQUENCIAL_CANDIDATO, # TSE's candidate code
                cpf_candidate = CPF_CANDIDATO, # Candidate's CPF (a government unique ID for individuals, similar to the social security number in the US)
                code_party = NUMERO_PARTIDO, # TSE's party code
                name_party = SIGLA_PARTIDO, # Party name
                round = NUM_TURNO, # Round of elections
                candidate_situation = DES_SITUACAO_CANDIDATURA, # situation of the candidacy
                candidate_result = DESC_SIT_TOT_TURNO,
                election_description = DESCRICAO_ELEICAO) # Description of the election
# Identify those who were elected as mayors
mayors_1216 <- candidates_1216 %>% 
  dplyr::filter(candidate_result == "ELEITO") # Keep elected ones 
# Identify municipalities where we observe multiple mayors being elected
repeated_muns <- mayors_1216[duplicated(mayors_1216$code_municipality_tse),] # 28
repeated_elections <- mayors_1216[which(mayors_1216$code_municipality_tse %in% repeated_muns$code_municipality_tse),] 
# Identify municipalities where we observe supplementary elections
supplementary_elections_1216 <- repeated_elections[which(repeated_elections$election_description!="ELEIÇÃO MUNICIPAL 2012"),]
supplementary_elections_1216$supplementary_election <- 1
# Remove observations where there are repeated elections
candidates_1216 <- candidates_1216 %>% 
  dplyr::filter(!(code_municipality_tse %in% repeated_elections$code_municipality_tse))
candidates_1216$supplementary_election <- 0
# Add the supplementary elections
candidates_1216 <- rbind(candidates_1216, supplementary_elections_1216)
# Remove one municipality for which we observe two supplementary elections -- we do not know which one is valid
candidates_1216 <- subset(candidates_1216, candidates_1216$code_municipality_tse != "05312")
# Filter the mayors file
mayors_1216 <- candidates_1216 %>% 
  dplyr::filter(candidate_result == "ELEITO") # Keep elected ones 

# Identify mayoral candidates running for the 2008 - 2012 term
candidates_0812 <- d08 %>% 
  # Exclude candidates other than valid candidates for mayor who got elected
  dplyr::filter(DESCRICAO_CARGO=="PREFEITO" &  # Retain only candidates for mayoral office
                  !(DES_SITUACAO_CANDIDATURA %in% invalid)) %>%  # And whose candidacy was not invalid
  # Keep and rename variables of interest
  dplyr::select(code_municipality_tse = SIGLA_UE, # TSE's municipality code
                code_candidate = SEQUENCIAL_CANDIDATO, # TSE's candidate code
                cpf_candidate = CPF_CANDIDATO, # Candidate's CPF (a government unique ID for individuals, similar to the social security number in the US)
                code_party = NUMERO_PARTIDO, # TSE's party code
                name_party = SIGLA_PARTIDO, # Party name
                round = NUM_TURNO, # Round of elections
                candidate_situation = DES_SITUACAO_CANDIDATURA, # situation of the candidacy
                candidate_result = DESC_SIT_TOT_TURNO,
                election_description = DESCRICAO_ELEICAO) # Description of the election
# Identify those who were elected as mayors
mayors_0812 <- candidates_0812 %>% 
  dplyr::filter(candidate_result == "ELEITO") # Keep elected ones 
# Identify municipalities where we observe multiple mayors being elected
repeated_muns <- mayors_0812[duplicated(mayors_0812$code_municipality_tse),] # 28
repeated_elections <- mayors_0812[which(mayors_0812$code_municipality_tse %in% repeated_muns$code_municipality_tse),] 
# Identify municipalities where we observe supplementary elections
supplementary_elections_0812 <- repeated_elections[which(repeated_elections$election_description== "ELEIÇÕES SUPLEMENTARES 2008"),]
supplementary_elections_0812$supplementary_election <- 1
# Remove observations where there are repeated elections
candidates_0812 <- candidates_0812 %>% 
  dplyr::filter(!(code_municipality_tse %in% repeated_elections$code_municipality_tse))
candidates_0812$supplementary_election <- 0
# Add the supplementary elections
candidates_0812 <- rbind(candidates_0812, supplementary_elections_0812)
# Remove two municipalities for which we observe two supplementary elections (we do not know which one is valid)
candidates_0812 <- subset(candidates_0812, candidates_0812$code_municipality_tse != "09156" & candidates_0812$code_municipality_tse != "43494")
# Filter the candidates file to keep only elected ones
mayors_0812 <- candidates_0812 %>% 
  dplyr::filter(candidate_result == "ELEITO") # Keep elected ones 

# Identify mayoral candidates running for the 2004 - 2008 term
candidates_0408 <- d04 %>% 
  # Exclude candidates other than valid candidates for mayor who got elected
  dplyr::filter(DESCRICAO_CARGO=="PREFEITO" & # Retain only candidates for mayoral office
                  !(DES_SITUACAO_CANDIDATURA %in% invalid) # And whose candidacy was not invalid
                # DESC_SIT_TOT_TURNO != "#NULO#"
  ) %>% # Whose performance is not considered null by the TSE
  # Keep and rename variables of interest
  dplyr::select(code_municipality_tse = SIGLA_UE, # TSE's municipality code
                code_candidate = SEQUENCIAL_CANDIDATO, # TSE's candidate code
                cpf_candidate = CPF_CANDIDATO, # Candidate's CPF (a government unique ID for individuals, similar to the social security number in the US)
                code_party = NUMERO_PARTIDO, # TSE's party code
                name_party = SIGLA_PARTIDO, # Party name
                round = NUM_TURNO, # Round of elections
                candidate_situation = DES_SITUACAO_CANDIDATURA, # situation of the candidacy
                candidate_result = DESC_SIT_TOT_TURNO,
                election_description = DESCRICAO_ELEICAO) # Description of the election
# No supplementary elections reported in 2004
candidates_0408$supplementary_election <- 0
# Identify those who were elected as mayors
mayors_0408 <- candidates_0408 %>% 
  dplyr::filter(candidate_result == "ELEITO") # Keep elected ones 
sum(duplicated(mayors_0408$code_municipality_tse)) # We do not observe two elected mayors for the same municipality, consistent with no supplementary elections
# Clean CPF codes
mayors_0408 <-mayors_0408 %>%
  dplyr::filter(cpf_candidate != "#NULO#") %>% # remove 1 case where mayor has no CPF
  dplyr::mutate(cpf_candidate = str_pad(cpf_candidate, 11, side = "left", pad="0"))
nrow(mayors_0408) # 5520 municipalities

# Identify mayors elected for the 2000 - 2004 term
mayors_0004 <- d00 %>% 
  # Exclude candidates other than valid candidates for mayor who got elected
  dplyr::filter(DESCRICAO_CARGO=="PREFEITO" & # Retain only candidates for mayoral office
                  DESC_SIT_TOT_TURNO == "ELEITO" # Who were elected
                & !(DES_SITUACAO_CANDIDATURA %in% invalid)) %>% # And whose candidacy was not invalid
  # Keep and rename variables of interest
  dplyr::select(code_municipality_tse = SIGLA_UE, # TSE's municipality code
                code_candidate = SEQUENCIAL_CANDIDATO, # TSE's candidate code
                cpf_candidate = CPF_CANDIDATO, # Candidate's CPF (a government unique ID for individuals, similar to the social security number in the US)
                code_party = NUMERO_PARTIDO, # TSE's party code
                name_party = SIGLA_PARTIDO, # Party name
                round = NUM_TURNO, # Round of elections
                candidate_situation = DES_SITUACAO_CANDIDATURA, # situation of the candidacy
                election_description = DESCRICAO_ELEICAO) # Description of the election
# Clean supplementary elections
# In 2000 there are no supplementary elections in the data, but there are municipalities with more than one elected mayor
mayors_0004$supplementary_election <- 0
# Remove duplicates, since we do not know which of these repeated elections was the valid one 
nrow(mayors_0004) # 5247 municipalities
duplicated_muns <- mayors_0004[which(duplicated(mayors_0004$code_municipality_tse)),]
nrow(duplicated_muns) # 17 are repeated
mayors_0004 <- mayors_0004 %>% 
  dplyr::filter(!(code_municipality_tse %in% duplicated_muns$code_municipality_tse))
# Clean CPF codes
mayors_0004 <-mayors_0004 %>%
  dplyr::filter(cpf_candidate != "#NULO#") %>% # remove 18 cases where mayor has no CPF
  dplyr::mutate(cpf_candidate = str_pad(cpf_candidate, 11, side = "left", pad="0"))
nrow(mayors_0004) # 5195 municipalities

# Extract data on the electoral performance of candidates ---------------------------------------------------

# Votes in the 2016 election
votes_1620 <- v16 %>%
  # Exclude candidates other than approved candidates who ran for mayor, did not have their candidacies voided
  dplyr::filter(DESCRICAO_CARGO == "PREFEITO" & 
                  DESC_SIT_CAND_SUPERIOR == "APTO" &
                  !(DESC_SIT_CANDIDATO %in% invalid)) %>%
  dplyr::select(code_municipality_tse = SIGLA_UE, # TSE's municipality code
                code_candidate = SQ_CANDIDATO, # TSE's candidate code
                code_party = NUMERO_PARTIDO, # TSE's party code
                round = NUM_TURNO, # Round of elections
                votes = TOTAL_VOTOS, # Number of votes
                candidate_result = DESC_SIT_CAND_TOT, # situation of the candidacy
                election_description = DESCRICAO_ELEICAO) %>%
  dplyr::mutate(votes = as.numeric(votes))
votes_1620_regular <- votes_1620 %>%
  dplyr::filter(!(code_municipality_tse %in% supplementary_elections_1620$code_municipality_tse))
votes_1620_supplementary <- votes_1620 %>%
  dplyr::filter(election_description != "ELEIÇÕES MUNICIPAIS 2016")
votes_1620 <- rbind(votes_1620_regular, votes_1620_supplementary)

# Votes in the 2012 election
votes_1216 <- v12 %>%
  # Exclude candidates other than approved candidates who ran for mayor, did not have their candidacies 
  dplyr::filter(DESCRICAO_CARGO == "PREFEITO" & 
                  DESC_SIT_CAND_SUPERIOR == "APTO" &
                  !(DESC_SIT_CANDIDATO %in% invalid)) %>%
  dplyr::select(code_municipality_tse = SIGLA_UE, # TSE's municipality code
                code_candidate = SQ_CANDIDATO, # TSE's candidate code
                code_party = NUMERO_PARTIDO, # TSE's party code
                round = NUM_TURNO, # Round of elections
                votes = TOTAL_VOTOS, # Number of votes
                candidate_result = DESC_SIT_CAND_TOT, # situation of the candidacy
                election_description = DESCRICAO_ELEICAO) %>%
  dplyr::mutate(votes = as.numeric(votes))
votes_1216_regular <- votes_1216 %>%
  dplyr::filter(!(code_municipality_tse %in% supplementary_elections_1216$code_municipality_tse))
votes_1216_supplementary <- votes_1216 %>%
  dplyr::filter(election_description != "ELEIÇÃO MUNICIPAL 2012")
votes_1216 <- rbind(votes_1216_regular, votes_1216_supplementary)

# Votes in the 2008 election
votes_0812 <- v08 %>%
  # Exclude candidates other than approved candidates who ran for mayor, did not have their candidacies 
  dplyr::filter(DESCRICAO_CARGO == "PREFEITO" & 
                  DESC_SIT_CAND_SUPERIOR == "APTO" &
                  !(DESC_SIT_CANDIDATO %in% invalid)) %>%
  dplyr::select(code_municipality_tse = SIGLA_UE, # TSE's municipality code
                code_candidate = SQ_CANDIDATO, # TSE's candidate code
                code_party = NUMERO_PARTIDO, # TSE's party code
                round = NUM_TURNO, # Round of elections
                votes = TOTAL_VOTOS, # Number of votes
                candidate_result = DESC_SIT_CAND_TOT, # situation of the candidacy
                election_description = DESCRICAO_ELEICAO) %>%
  dplyr::mutate(votes = as.numeric(votes))
votes_0812_regular <- votes_0812 %>%
  dplyr::filter(!(code_municipality_tse %in% supplementary_elections_0812$code_municipality_tse))
votes_0812_supplementary <- votes_0812 %>%
  dplyr::filter(election_description != "ELEIÇÕES 2008")
votes_0812 <- rbind(votes_0812_regular, votes_0812_supplementary)

# Votes in the 2004 election
votes_0408 <- v04 %>%
  # Exclude candidates other than approved candidates who ran for mayor, did not have their candidacies 
  dplyr::filter(DESCRICAO_CARGO == "PREFEITO" & 
                  !(DESC_SIT_CANDIDATO %in% invalid)) %>%
  dplyr::select(code_municipality_tse = SIGLA_UE, # TSE's municipality code
                code_candidate = SQ_CANDIDATO, # TSE's candidate code
                code_party = NUMERO_PARTIDO, # TSE's party code
                round = NUM_TURNO, # Round of elections
                votes = TOTAL_VOTOS, # Number of votes
                candidate_result = DESC_SIT_CAND_TOT, # situation of the candidacy
                election_description = DESCRICAO_ELEICAO) %>%
  dplyr::mutate(votes = as.numeric(votes))

# Votes in the 2000 election
votes_0004 <- v00 %>%
  # Exclude candidates other than approved candidates who ran for mayor, did not have their candidacies 
  dplyr::filter(DESCRICAO_CARGO == "PREFEITO" & 
                  DESC_SIT_CAND_SUPERIOR == "APTO" &
                  !(DESC_SIT_CANDIDATO %in% invalid)) %>%
  dplyr::select(code_municipality_tse = SIGLA_UE, # TSE's municipality code
                code_candidate = SQ_CANDIDATO, # TSE's candidate code
                code_party = NUMERO_PARTIDO, # TSE's party code
                round = NUM_TURNO, # Round of elections
                votes = TOTAL_VOTOS, # Number of votes
                candidate_result = DESC_SIT_CAND_TOT, # situation of the candidacy
                election_description = DESCRICAO_ELEICAO) %>%
  dplyr::mutate(votes = as.numeric(votes))


# Merge with identifiers for municipalities ---------------------

municipality_identifiers <- read_csv("../../datasets/downloaded/other/basedosdados_municipality_identifiers.csv")  

municipalities <- municipality_identifiers %>%
  mutate(cod_ibge = id_municipio_6, cod_tse = id_municipio_tse, cod_uf = id_uf) %>%
  select(cod_ibge, cod_tse, cod_uf)

# Generate the 5-digit version of the TSE code (with leading zeroes)
municipalities$code_municipality_tse <- str_pad(as.character(municipalities$cod_tse), 5, side = "left", pad = "0") # Add leading zeroes (TSE codes are all 5-digit)

# Merge mayor data, keeping separate datasets for each election cycle
m16 <- left_join(municipalities, mayors_1620)
m12 <- left_join(municipalities, mayors_1216)
m08 <- left_join(municipalities, mayors_0812)
m04 <- left_join(municipalities, mayors_0408)
m00 <- left_join(municipalities, mayors_0004)

# Generate variables for the electoral performance of the incumbent and their strongest challenger ------------------------------------------------------

# 2016 election 
m16$incumbent_mayor_ran <- NA
m16$total_votes <- NA
m16$electoral_concentration <- NA
m16$incumbent_mayor_voteshare <- NA
m16$strongest_opponent_voteshare <- NA

for(i in 1:nrow(m16)){
    # Get votes in the municipality
    votes_here <- subset(votes_1620, votes_1620$code_municipality_tse == m16$code_municipality_tse[i] & votes_1620$round==1)
    # List of candidates
    candidates_here <- subset(candidates_1620, candidates_1620$code_municipality_tse == m16$code_municipality_tse[i] & candidates_1620$round==1)
    # Check if the incumbent ran
    cpf_incumbent <- m12[which(m12$code_municipality_tse==m16$code_municipality_tse[i]),"cpf_candidate"][[1]]
    m16$incumbent_mayor_ran[i] <- ifelse(cpf_incumbent %in% candidates_here$cpf_candidate,1,0)
    votes_here_by_candidate <- subset(votes_here, votes_here$round==1) %>% 
      group_by(code_candidate) %>% # Some candidates have votes reported in multiple lines
      dplyr::summarise(votes = sum(votes))
    m16$electoral_concentration[i] <- sum((votes_here_by_candidate$votes/sum(votes_here_by_candidate$votes))^2)
    # Vote numbers in the first round
    # Sum number of votes
    m16$total_votes[i] <- sum(votes_here_by_candidate$votes)
    # Vote share of incumbent
    if(m16$incumbent_mayor_ran[i]==0){
      next
    }
    code_incumbent <- candidates_here[which(candidates_here$cpf_candidate==cpf_incumbent & candidates_here$round==1),"code_candidate"][[1]]
    m16$incumbent_mayor_voteshare[i] <- sum(votes_here_by_candidate[which(votes_here_by_candidate$code_candidate==code_incumbent),"votes"][[1]]/sum(votes_here_by_candidate$votes))
    votes_here_by_opponent <- subset(votes_here_by_candidate, votes_here_by_candidate$code_candidate != code_incumbent) %>%
      arrange(desc(votes))
    m16$strongest_opponent_voteshare[i] <- sum(votes_here_by_opponent[1,"votes"][[1]]/sum(votes_here_by_candidate$votes))
  if(!is.na(m16$round[i]) & m16$round[i]==2){ # If the election was decided in the second round, record electoral performance in that round instead of the first one
    # Vote tallies
    votes_here <- subset(votes_1620, votes_1620$code_municipality_tse == m16$code_municipality_tse[i] & votes_1620$round==2)
    # List of candidates
    candidates_here <- subset(candidates_1620, candidates_1620$code_municipality_tse == m16$code_municipality_tse[i] & candidates_1620$round==2)
    # Check if the incumbent made it to the second round
    m16$incumbent_mayor_ran[i] <- ifelse(cpf_incumbent %in% candidates_here$cpf_candidate,1,0)
    if(m16$incumbent_mayor_ran[i]==0){
      m16$incumbent_mayor_voteshare[i] <- NA
      m16$strongest_opponent_voteshare[i] <- NA
      next
    }
    votes_here_by_candidate <- subset(votes_here, votes_here$round==2) %>% 
      group_by(code_candidate) %>% # Some candidates have votes reported in multiple lines
      dplyr::summarise(votes = sum(votes))
    m16$incumbent_mayor_voteshare[i] <- sum(votes_here_by_candidate[which(votes_here_by_candidate$code_candidate==code_incumbent),"votes"][[1]]/sum(votes_here_by_candidate$votes))
    votes_here_by_opponent <- subset(votes_here_by_candidate, votes_here_by_candidate$code_candidate != code_incumbent) %>%
      arrange(desc(votes))
    m16$strongest_opponent_voteshare[i] <- sum(votes_here_by_opponent[1,"votes"][[1]]/sum(votes_here_by_candidate$votes))
  }
}

# 2012 election 
m12$incumbent_mayor_ran <- NA
m12$total_votes <- NA
m12$electoral_concentration <- NA
m12$incumbent_mayor_voteshare <- NA
m12$strongest_opponent_voteshare <- NA

for(i in 1:nrow(m12)){
  # Get votes in the municipality
  votes_here <- subset(votes_1216, votes_1216$code_municipality_tse == m12$code_municipality_tse[i] & votes_1216$round==1)
  # List of candidates
  candidates_here <- subset(candidates_1216, candidates_1216$code_municipality_tse == m12$code_municipality_tse[i] & candidates_1216$round==1)
  # Check if the incumbent ran
  cpf_incumbent <- m08[which(m08$code_municipality_tse==m12$code_municipality_tse[i]),"cpf_candidate"][[1]]
  m12$incumbent_mayor_ran[i] <- ifelse(cpf_incumbent %in% candidates_here$cpf_candidate,1,0)
  votes_here_by_candidate <- subset(votes_here, votes_here$round==1) %>% 
    group_by(code_candidate) %>% # Some candidates have votes reported in multiple lines
    dplyr::summarise(votes = sum(votes))
  m12$electoral_concentration[i] <- sum((votes_here_by_candidate$votes/sum(votes_here_by_candidate$votes))^2)
  # Vote numbers in the first round
  # Sum number of votes
  m12$total_votes[i] <- sum(votes_here_by_candidate$votes)
  # Vote share of incumbent
  if(m12$incumbent_mayor_ran[i]==0){
    next
  }
  code_incumbent <- candidates_here[which(candidates_here$cpf_candidate==cpf_incumbent & candidates_here$round==1),"code_candidate"][[1]]
  m12$incumbent_mayor_voteshare[i] <- sum(votes_here_by_candidate[which(votes_here_by_candidate$code_candidate==code_incumbent),"votes"][[1]]/sum(votes_here_by_candidate$votes))
  votes_here_by_opponent <- subset(votes_here_by_candidate, votes_here_by_candidate$code_candidate != code_incumbent) %>%
    arrange(desc(votes))
  m12$strongest_opponent_voteshare[i] <- sum(votes_here_by_opponent[1,"votes"][[1]]/sum(votes_here_by_candidate$votes))
  if(!is.na(m12$round[i]) & m12$round[i]==2){ # If the election was decided in the second round, record electoral performance in that round instead of the first one
    # Vote tallies
    votes_here <- subset(votes_1216, votes_1216$code_municipality_tse == m12$code_municipality_tse[i] & votes_1216$round==2)
    # List of candidates
    candidates_here <- subset(candidates_1216, candidates_1216$code_municipality_tse == m12$code_municipality_tse[i] & candidates_1216$round==2)
    # Check if the incumbent made it to the second round
    m12$incumbent_mayor_ran[i] <- ifelse(cpf_incumbent %in% candidates_here$cpf_candidate,1,0)
    if(m12$incumbent_mayor_ran[i]==0){
      m12$incumbent_mayor_voteshare[i] <- NA
      m12$strongest_opponent_voteshare[i] <- NA
      next
    }
    votes_here_by_candidate <- subset(votes_here, votes_here$round==2) %>% 
      group_by(code_candidate) %>% # Some candidates have votes reported in multiple lines
      dplyr::summarise(votes = sum(votes))
    m12$incumbent_mayor_voteshare[i] <- sum(votes_here_by_candidate[which(votes_here_by_candidate$code_candidate==code_incumbent),"votes"][[1]]/sum(votes_here_by_candidate$votes))
    votes_here_by_opponent <- subset(votes_here_by_candidate, votes_here_by_candidate$code_candidate != code_incumbent) %>%
      arrange(desc(votes))
    m12$strongest_opponent_voteshare[i] <- sum(votes_here_by_opponent[1,"votes"][[1]]/sum(votes_here_by_candidate$votes))
  }
}

# 2008 election 
m08$incumbent_mayor_ran <- NA
m08$total_votes <- NA
m08$electoral_concentration <- NA
m08$incumbent_mayor_voteshare <- NA
m08$strongest_opponent_voteshare <- NA

for(i in 1:nrow(m08)){
  # Get votes in the municipality
  votes_here <- subset(votes_0812, votes_0812$code_municipality_tse == m08$code_municipality_tse[i] & votes_0812$round==1)
  # List of candidates
  candidates_here <- subset(candidates_0812, candidates_0812$code_municipality_tse == m08$code_municipality_tse[i] & candidates_0812$round==1)
  # Check if the incumbent ran
  cpf_incumbent <- m04[which(m04$code_municipality_tse==m08$code_municipality_tse[i]),"cpf_candidate"][[1]]
  m08$incumbent_mayor_ran[i] <- ifelse(cpf_incumbent %in% candidates_here$cpf_candidate,1,0)
  votes_here_by_candidate <- subset(votes_here, votes_here$round==1) %>% 
    group_by(code_candidate) %>% # Some candidates have votes reported in multiple lines
    dplyr::summarise(votes = sum(votes))
  m08$electoral_concentration[i] <- sum((votes_here_by_candidate$votes/sum(votes_here_by_candidate$votes))^2)
  # Vote numbers in the first round
  # Sum number of votes
  m08$total_votes[i] <- sum(votes_here_by_candidate$votes)
  # Vote share of incumbent
  if(m08$incumbent_mayor_ran[i]==0){
    next
  }
  code_incumbent <- candidates_here[which(candidates_here$cpf_candidate==cpf_incumbent & candidates_here$round==1),"code_candidate"][[1]]
  m08$incumbent_mayor_voteshare[i] <- sum(votes_here_by_candidate[which(votes_here_by_candidate$code_candidate==code_incumbent),"votes"][[1]]/sum(votes_here_by_candidate$votes))
  votes_here_by_opponent <- subset(votes_here_by_candidate, votes_here_by_candidate$code_candidate != code_incumbent) %>%
    arrange(desc(votes))
  m08$strongest_opponent_voteshare[i] <- sum(votes_here_by_opponent[1,"votes"][[1]]/sum(votes_here_by_candidate$votes))
  if(!is.na(m08$round[i]) & m08$round[i]==2){ # If the election was decided in the second round, record electoral performance in that round instead of the first one
    # Vote tallies
    votes_here <- subset(votes_0812, votes_0812$code_municipality_tse == m08$code_municipality_tse[i] & votes_0812$round==2)
    # List of candidates
    candidates_here <- subset(candidates_0812, candidates_0812$code_municipality_tse == m08$code_municipality_tse[i] & candidates_0812$round==2)
    # Check if the incumbent made it to the second round
    m08$incumbent_mayor_ran[i] <- ifelse(cpf_incumbent %in% candidates_here$cpf_candidate,1,0)
    if(m08$incumbent_mayor_ran[i]==0){
      m08$incumbent_mayor_voteshare[i] <- NA
      m08$strongest_opponent_voteshare[i] <- NA
      next
    }
    votes_here_by_candidate <- subset(votes_here, votes_here$round==2) %>% 
      group_by(code_candidate) %>% # Some candidates have votes reported in multiple lines
      dplyr::summarise(votes = sum(votes))
    m08$incumbent_mayor_voteshare[i] <- sum(votes_here_by_candidate[which(votes_here_by_candidate$code_candidate==code_incumbent),"votes"][[1]]/sum(votes_here_by_candidate$votes))
    votes_here_by_opponent <- subset(votes_here_by_candidate, votes_here_by_candidate$code_candidate != code_incumbent) %>%
      arrange(desc(votes))
    m08$strongest_opponent_voteshare[i] <- sum(votes_here_by_opponent[1,"votes"][[1]]/sum(votes_here_by_candidate$votes))
  }
}

# 2004 election 
m04$incumbent_mayor_ran <- NA
m04$total_votes <- NA
m04$electoral_concentration <- NA
m04$incumbent_mayor_voteshare <- NA
m04$strongest_opponent_voteshare <- NA

for(i in 1:nrow(m04)){
  # Get votes in the municipality
  votes_here <- subset(votes_0408, votes_0408$code_municipality_tse == m04$code_municipality_tse[i] & votes_0408$round==1)
  # List of candidates
  candidates_here <- subset(candidates_0408, candidates_0408$code_municipality_tse == m04$code_municipality_tse[i] & candidates_0408$round==1)
  # Check if the incumbent ran
  cpf_incumbent <- m00[which(m00$code_municipality_tse==m04$code_municipality_tse[i]),"cpf_candidate"][[1]]
  m04$incumbent_mayor_ran[i] <- ifelse(cpf_incumbent %in% candidates_here$cpf_candidate,1,0)
  votes_here_by_candidate <- subset(votes_here, votes_here$round==1) %>% 
    group_by(code_candidate) %>% # Some candidates have votes reported in multiple lines
    dplyr::summarise(votes = sum(votes))
  m04$electoral_concentration[i] <- sum((votes_here_by_candidate$votes/sum(votes_here_by_candidate$votes))^2)
  # Vote numbers in the first round
  # Sum number of votes
  m04$total_votes[i] <- sum(votes_here_by_candidate$votes)
  # Vote share of incumbent
  if(m04$incumbent_mayor_ran[i]==0){
    next
  }
  code_incumbent <- candidates_here[which(candidates_here$cpf_candidate==cpf_incumbent & candidates_here$round==1),"code_candidate"][[1]]
  m04$incumbent_mayor_voteshare[i] <- sum(votes_here_by_candidate[which(votes_here_by_candidate$code_candidate==code_incumbent),"votes"][[1]]/sum(votes_here_by_candidate$votes))
  votes_here_by_opponent <- subset(votes_here_by_candidate, votes_here_by_candidate$code_candidate != code_incumbent) %>%
    arrange(desc(votes))
  m04$strongest_opponent_voteshare[i] <- sum(votes_here_by_opponent[1,"votes"][[1]]/sum(votes_here_by_candidate$votes))
  if(!is.na(m04$round[i]) & m04$round[i]==2){ # If the election was decided in the second round, record electoral performance in that round instead of the first one
    # Vote tallies
    votes_here <- subset(votes_0408, votes_0408$code_municipality_tse == m04$code_municipality_tse[i] & votes_0408$round==2)
    # List of candidates
    candidates_here <- subset(candidates_0408, candidates_0408$code_municipality_tse == m04$code_municipality_tse[i] & candidates_0408$round==2)
    # Check if the incumbent made it to the second round
    m04$incumbent_mayor_ran[i] <- ifelse(cpf_incumbent %in% candidates_here$cpf_candidate,1,0)
    if(m04$incumbent_mayor_ran[i]==0){
      m04$incumbent_mayor_voteshare[i] <- NA
      m04$strongest_opponent_voteshare[i] <- NA
      next
    }
    votes_here_by_candidate <- subset(votes_here, votes_here$round==2) %>% 
      group_by(code_candidate) %>% # Some candidates have votes reported in multiple lines
      dplyr::summarise(votes = sum(votes))
    m04$incumbent_mayor_voteshare[i] <- sum(votes_here_by_candidate[which(votes_here_by_candidate$code_candidate==code_incumbent),"votes"][[1]]/sum(votes_here_by_candidate$votes))
    votes_here_by_opponent <- subset(votes_here_by_candidate, votes_here_by_candidate$code_candidate != code_incumbent) %>%
      arrange(desc(votes))
    m04$strongest_opponent_voteshare[i] <- sum(votes_here_by_opponent[1,"votes"][[1]]/sum(votes_here_by_candidate$votes))
  }
}

# Gather indicators for party of the incumbent ---------------------------------

m16$incumbent_party_pt <- NA
m16$incumbent_party_psdb <- NA
m16$incumbent_party_pmdb <- NA
m16$incumbent_party_large <- NA
m16$incumbent_party_aligned <- NA

m12$incumbent_party_pt <- NA
m12$incumbent_party_psdb <- NA
m12$incumbent_party_pmdb <- NA
m12$incumbent_party_large <- NA
m12$incumbent_party_aligned <- NA

m08$incumbent_party_pt <- NA
m08$incumbent_party_psdb <- NA
m08$incumbent_party_pmdb <- NA
m08$incumbent_party_large <- NA
m08$incumbent_party_aligned <- NA

m04$incumbent_party_pt <- NA
m04$incumbent_party_psdb <- NA
m04$incumbent_party_pmdb <- NA
m04$incumbent_party_large <- NA
m04$incumbent_party_aligned <- NA

for(i in 1:nrow(m16)){
  incumbent_party <- m12[which(m12$code_municipality_tse==m16$code_municipality_tse[i]),"code_party"]
  m16$incumbent_party_pt[i] <- ifelse(incumbent_party==13,1,0) # Each party has a unique identifier assigned by TSE: http://www.tse.jus.br/partidos/partidos-politicos/registrados-no-tse
  m16$incumbent_party_psdb[i] <- ifelse(incumbent_party==45,1,0)
  m16$incumbent_party_pmdb[i] <- ifelse(incumbent_party==15,1,0)
  m16$incumbent_party_large[i] <- ifelse(incumbent_party %in% c(13,45,15,11),1,0)
  m16$incumbent_party_aligned[i] <- ifelse(incumbent_party==15,1,0)
}
for(i in 1:nrow(m12)){
  incumbent_party <- m08[which(m08$code_municipality_tse==m12$code_municipality_tse[i]),"code_party"]
  m12$incumbent_party_pt[i] <- ifelse(incumbent_party==13,1,0)
  m12$incumbent_party_psdb[i] <- ifelse(incumbent_party==45,1,0)
  m12$incumbent_party_pmdb[i] <- ifelse(incumbent_party==15,1,0)
  m12$incumbent_party_large[i] <- ifelse(incumbent_party %in% c(13,45,15,11),1,0)
  m12$incumbent_party_aligned[i] <- ifelse(incumbent_party==13,1,0)
}
for(i in 1:nrow(m08)){
  incumbent_party <- m04[which(m04$code_municipality_tse==m08$code_municipality_tse[i]),"code_party"]
  m08$incumbent_party_pt[i] <- ifelse(incumbent_party==13,1,0)
  m08$incumbent_party_psdb[i] <- ifelse(incumbent_party==45,1,0)
  m08$incumbent_party_pmdb[i] <- ifelse(incumbent_party==15,1,0)
  m08$incumbent_party_large[i] <- ifelse(incumbent_party %in% c(13,45,15,11),1,0)
  m08$incumbent_party_aligned[i] <- ifelse(incumbent_party==13,1,0)
}
for(i in 1:nrow(m04)){
  incumbent_party <- m04[which(m04$code_municipality_tse==m04$code_municipality_tse[i]),"code_party"]
  m04$incumbent_party_pt[i] <- ifelse(incumbent_party==13,1,0)
  m04$incumbent_party_psdb[i] <- ifelse(incumbent_party==45,1,0)
  m04$incumbent_party_pmdb[i] <- ifelse(incumbent_party==15,1,0)
  m04$incumbent_party_large[i] <- ifelse(incumbent_party %in% c(13,45,15,11),1,0)
  m04$incumbent_party_aligned[i] <- ifelse(incumbent_party==13,1,0)
}

m16$year <- 2016
m12$year <- 2012
m08$year <- 2008
m04$year <- 2004

# Bind all election cycles and generate year fixed effects ------------------------------------------------

election_data <- rbind(m16,m12,m08,m04)
election_data$challenger_margin <- election_data$strongest_opponent_voteshare - election_data$incumbent_mayor_voteshare

# Year fixed effects
election_data$year_2016 <- ifelse(election_data$year==2016,1,0)
election_data$year_2012 <- ifelse(election_data$year==2012,1,0)
election_data$year_2008 <- ifelse(election_data$year==2008,1,0)
election_data$year_2004 <- ifelse(election_data$year==2004,1,0)

# Remove supplementary elections ------------------------------------------

# Remove observations where the winner was decided in a supplementary elections
# These elections take place when the election held at the regular date is deemed invalid by the electoral courts
# Unfortunately TSE does not report when these elections are held, but they are typically months or years after the official date.
# Less than 1.5% of municipalities in the data have supplementary elections
# Leaving them in the data does not alter the results. 

e <- subset(election_data, election_data$supplementary_election==0)

# Remove superfluous variables --------------------------------------------

e <- e %>%
  dplyr::select(c("cod_ibge","year", "challenger_margin",
                  "year_2004", "year_2008", "year_2012", "year_2016",
                  "incumbent_party_pt", "incumbent_party_psdb", "incumbent_party_pmdb", "incumbent_party_large", "incumbent_party_aligned",
                  "total_votes", "electoral_concentration"))

# Export ------------------------------------------------------------------

write_csv(e, "../../datasets/analysis/election_data.csv")

# Notes: R version, platform, and loaded packages -------------------------

sessionInfo(package = NULL)

# R version 4.2.1 (2022-06-23)
# Platform: aarch64-apple-darwin20 (64-bit)
# Running under: macOS Monterey 12.1
# 
# Matrix products: default
# LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
# 
# locale:
#   [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
# 
# attached base packages:
#   [1] stats     graphics  grDevices utils     datasets  methods   base     
# 
# other attached packages:
#   [1] electionsBR_0.3.2 codebook_0.9.2    here_1.0.1        forcats_0.5.2    
# [5] stringr_1.5.0     dplyr_1.1.2       purrr_0.3.4       readr_2.1.2      
# [9] tidyr_1.2.0       tibble_3.2.1      ggplot2_3.3.6     tidyverse_1.3.2  
# 
# loaded via a namespace (and not attached):
#   [1] lubridate_1.8.0     assertthat_0.2.1    rprojroot_2.0.3     digest_0.6.29      
# [5] utf8_1.2.3          R6_2.5.1            cellranger_1.1.0    backports_1.4.1    
# [9] reprex_2.0.2        labelled_2.9.1      httr_1.4.4          pillar_1.9.0       
# [13] rlang_1.1.1         googlesheets4_1.0.1 readxl_1.4.1        rstudioapi_0.14    
# [17] jquerylib_0.1.4     DT_0.24             googledrive_2.0.0   htmlwidgets_1.5.4  
# [21] bit_4.0.4           munsell_0.5.0       broom_1.0.1         compiler_4.2.1     
# [25] modelr_0.1.9        pkgconfig_2.0.3     htmltools_0.5.3     tidyselect_1.2.0   
# [29] fansi_1.0.4         crayon_1.5.1        tzdb_0.3.0          dbplyr_2.2.1       
# [33] withr_2.5.0         grid_4.2.1          jsonlite_1.8.0      gtable_0.3.1       
# [37] lifecycle_1.0.3     DBI_1.1.3           magrittr_2.0.3      scales_1.2.1       
# [41] cli_3.6.1           stringi_1.7.12      vroom_1.5.7         cachem_1.0.6       
# [45] fs_1.5.2            xml2_1.3.3          bslib_0.4.0         ellipsis_0.3.2     
# [49] generics_0.1.3      vctrs_0.6.2         tools_4.2.1         bit64_4.0.5        
# [53] glue_1.6.2          hms_1.1.2           crosstalk_1.2.0     yaml_2.3.5         
# [57] parallel_4.2.1      fastmap_1.1.0       colorspace_2.0-3    gargle_1.2.0       
# [61] rvest_1.0.3         haven_2.5.1         sass_0.4.2     